/*
 *******************************************************************************
 *
 * Copyright (c) 2014-2017 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 ******************************************************************************/

#include "core/device.h"
#include "core/g_palSettings.h"
#include "core/platform.h"
#include "core/hw/gfxip/gfxDevice.h"
#include "core/hw/gfxip/palToScpcWrapper.h"
#include "core/hw/gfxip/pipeline.h"
#include "palElfPackagerImpl.h"
#include "palFile.h"
#include "palPipelineAbiProcessorImpl.h"

#if PAL_BUILD_GPUOPEN
#include "core/devDriverUtil.h"
#endif

using namespace Util;

namespace Pal
{

// The generator describes the pipeline is generated by PAL or extern tool. Driver doesn't need to validate the
// buildId or settingsHash if it's generated by an extern tool.
enum class SerializedPipelineGenerator : uint32
{
    Pal         = 0, // The pipeline is generated by Pal driver.
    ExternTool  = 1, // The pipeline is generated by extern tool.
};

// Represents information for compatibility checks when loading a stored pipeline.  If a pipeline was stored by a
// different version of PAL than the version loading it, the load will fail.
struct SerializedPipelineHeader
{
    uint32        deviceId;       // As in DeviceProperties.
    BuildUniqueId buildId;        // 16-byte identifier for a particular PAL build (typically a time-stamp of the
                                  // compiled library that serialized the pipeline.
    MetroHash::Hash settingsHash; // Hash of the active PAL settings this pipeline was compiled with.

    // Serialize the base addresses of each VA range partition since some of those are baked into compiled shaders.
    gpusize vaRangeBaseAddr[static_cast<uint32>(VaRange::Count)];

    SerializedPipelineGenerator generator; // Indicates what generated this pipeline.
};

// Private structure used to store/load a data members of a pipeline object.
struct SerializedData
{
    size_t          totalGpuMemSize;
    PipelineInfo    info;
    ShaderMetadata  shaderMetadata;
};

// =====================================================================================================================
// Helper function which returns the time & date that pipeline.cpp was compiled.
static void PAL_STDCALL GetBuildTime(
    BuildUniqueId* pBuildId)
{
    const char DateString[] = __DATE__;
    const char TimeString[] = __TIME__;

    memset(pBuildId, 0, sizeof(pBuildId[0]));
    memcpy(&pBuildId->buildDate, &DateString[0], Min(sizeof(DateString), sizeof(pBuildId->buildDate)));
    memcpy(&pBuildId->buildTime, &TimeString[0], Min(sizeof(TimeString), sizeof(pBuildId->buildTime)));
}

// =====================================================================================================================
Pipeline::Pipeline(
    Device* pDevice,
    bool    isInternal)  // True if this is a PAL-owned pipeline (i.e., an RPM pipeline).
    :
    m_pDevice(pDevice),
    m_gpuMem(),
    m_gpuMemSize(0),
    m_pPipelineBinary(nullptr),
    m_pipelineBinaryLen(0),
    m_apiHwMapping()
{
    m_flags.value      = 0;
    m_flags.isInternal = isInternal;

    m_apiHwMapping.u64All = 0;

    memset(&m_info, 0, sizeof(m_info));
    memset(&m_shaderMetaData, 0, sizeof(m_shaderMetaData));
    memset(&m_perfDataInfo, 0, sizeof(m_perfDataInfo));
}

// =====================================================================================================================
Pipeline::~Pipeline()
{
    if (m_gpuMem.IsBound())
    {
        m_pDevice->MemMgr()->FreeGpuMem(m_gpuMem.Memory(), m_gpuMem.Offset());
        m_gpuMem.Update(nullptr, 0);
    }

    {
        PAL_SAFE_FREE(m_pPipelineBinary, m_pDevice->GetPlatform());
    }
}

// =====================================================================================================================
// Destroys a pipeline object allocated via a subclass' CreateInternal()
void Pipeline::DestroyInternal()
{
    PAL_ASSERT(IsInternal());

    Platform*const pPlatform = m_pDevice->GetPlatform();
    Destroy();
    PAL_FREE(this, pPlatform);
}

// =====================================================================================================================
// Serializes a pipeline into data fit for storing to disk by a client to reduce load-time versus re-compiling all
// pipelines on application startup.  See IPipeline::Store for full details.
Result Pipeline::Store(
    size_t* pDataSize,
    void*   pData)
{
    ElfWriteContext<Platform> context(m_pDevice->GetPlatform());

    Result result = Store(&context);

    if (result == Result::Success)
    {
        const size_t bufSize = context.GetRequiredBufferSizeBytes();

        if (pData != nullptr)
        {
            if (bufSize <= *pDataSize)
            {
                context.WriteToBuffer(static_cast<char*>(pData), bufSize);
            }
            else
            {
                result = Result::ErrorInvalidMemorySize;
            }
        }

        *pDataSize = bufSize;
    }

    return result;
}

// =====================================================================================================================
// Helper function to perform the store function on the specified write context.
// Can be called by ICompoundState::Store to store the pipeline data in the same context used for the rest of the
// states.
Result Pipeline::Store(
    ElfWriteContext<Platform>* pContext)
{
    SerializedPipelineHeader header = { };

    header.deviceId = m_pDevice->ChipProperties().deviceId;

    GetBuildTime(&header.buildId);

    header.settingsHash = m_pDevice->GetSettingsHash();

    for (uint32 i = 0; i < static_cast<uint32>(VaRange::Count); ++i)
    {
        header.vaRangeBaseAddr[i] = m_pDevice->MemoryProperties().vaRange[i].baseVirtAddr;
    }

    header.generator = SerializedPipelineGenerator::Pal;

    Result result = pContext->AddBinarySection(".pipelineHeader", &header, sizeof(SerializedPipelineHeader));

    if (result == Result::Success)
    {
        SerializedData data = { };

        data.info = m_info;
        data.shaderMetadata = m_shaderMetaData;

        result = pContext->AddBinarySection(".pipelineData", &data, sizeof(SerializedData));
    }

    if (result == Result::Success)
    {
        result = Serialize(pContext);
    }

    return result;
}

// =====================================================================================================================
// Validates the read context to ensure this is a valid stored pipeline to load.
Result Pipeline::ValidateLoad(
    const Device*                   pDevice,
    const ElfReadContext<Platform>& context)
{
    size_t readSize = 0;
    const SerializedPipelineHeader* pHeader = nullptr;

    Result result = GetLoadedSectionData(context,
                                         ".pipelineHeader",
                                         reinterpret_cast<const void**>(&pHeader),
                                         &readSize);

    if (result == Result::Success)
    {
        const MetroHash::Hash settingsHash = pDevice->GetSettingsHash();

        if (pHeader->deviceId != pDevice->ChipProperties().deviceId)
        {
            result = Result::ErrorIncompatibleDevice;
        }
        else if (pHeader->generator == SerializedPipelineGenerator::Pal)
        {
            Util::BuildUniqueId buildId;
            GetBuildTime(&buildId);

            if ((memcmp(pHeader->buildId.buildDate, buildId.buildDate, sizeof(buildId.buildDate)) != 0) ||
                (memcmp(pHeader->buildId.buildTime, buildId.buildTime, sizeof(buildId.buildTime)) != 0))
            {
                result = Result::ErrorIncompatibleLibrary;
            }
            else if (memcmp(&pHeader->settingsHash, &settingsHash, sizeof(MetroHash::Hash)))
            {
                result = Result::ErrorIncompatibleLibrary;
            }
        }

        if (result == Result::Success)
        {
            for (uint32 i = 0; i < static_cast<uint32>(VaRange::Count); ++i)
            {
                if (pHeader->vaRangeBaseAddr[i] != pDevice->MemoryProperties().vaRange[i].baseVirtAddr)
                {
                    result = Result::ErrorIncompatibleDevice;
                    break;
                }
            }
        }
    }

    return result;
}

// =====================================================================================================================
// Initializes this pipeline based on the contents of an ELF contents created with a previous call to Pipeline::Store.
Result Pipeline::LoadInit(
    const ElfReadContext<Platform>& context)
{
    size_t readSize = 0;
    const SerializedData* pData = nullptr;

    Result result = GetLoadedSectionData(context, ".pipelineData", reinterpret_cast<const void**>(&pData), &readSize);

    if (result == Result::Success)
    {
        m_info = pData->info;
        m_shaderMetaData = pData->shaderMetadata;
    }

    return result;
}

// =====================================================================================================================
// Examines the serialized pipeline data to determine if this is a compute or graphics pipeline object.
PipelineType Pipeline::DetermineLoadedPipelineType(
    const Device&                   device,
    const ElfReadContext<Platform>& context)
{
    size_t       readSize     = 0;
    PipelineType pipelineType = PipelineTypeUnknown;

    const PipelineType* pPipelineType = NULL;

    Result result = context.GetSectionData(".pipelineType", reinterpret_cast<const void**>(&pPipelineType), &readSize);

    if ((result == Result::Success) && (readSize == sizeof(PipelineType)))
    {
        pipelineType = *pPipelineType;
    }

    return pipelineType;
}

// =====================================================================================================================
// Helper method for loading serialized pipeline data from an ELF, and in particular, translating failures to the
// appropriate error code.
Result Pipeline::GetLoadedSectionData(
    const ElfReadContext<Platform>& context,
    const char*                     pName,
    const void**                    ppData,
    size_t*                         pDataLength)
{
    Result result = context.GetSectionData(pName, ppData, pDataLength);

    if (result != Result::Success)
    {
        result = Result::ErrorBadPipelineData;
    }

    return result;
}

// =====================================================================================================================
// Allocates GPU memory for this pipeline and uploads the code and data contain in the ELF binary to it.  Any ELF
// relocations are also applied to the memory during this operation.
Result Pipeline::PerformRelocationsAndUploadToGpuMemory(
    const AbiProcessor& abiProcessor,
    gpusize*            pCodeGpuVirtAddr,
    gpusize*            pDataGpuVirtAddr)
{
    PAL_ASSERT((pCodeGpuVirtAddr != nullptr) && (pDataGpuVirtAddr != nullptr));

    constexpr size_t GpuMemByteAlign = 256;

    GpuMemoryCreateInfo createInfo = { };
    createInfo.alignment = GpuMemByteAlign;
    createInfo.vaRange   = VaRange::DescriptorTable;
    createInfo.heaps[0]  = GpuHeapLocal;
    createInfo.heaps[1]  = GpuHeapGartUswc;
    createInfo.heapCount = 2;
    createInfo.priority  = GpuMemPriority::High;

    GpuMemoryInternalCreateInfo internalInfo = { };
    internalInfo.flags.alwaysResident = 1;

    const void* pCodeBuffer = nullptr;
    size_t      codeLength  = 0;
    abiProcessor.GetPipelineCode(&pCodeBuffer, &codeLength);

    createInfo.size = codeLength;

    const void* pDataBuffer   = nullptr;
    size_t      dataLength    = 0;
    gpusize     dataAlignment = 0;

    abiProcessor.GetData(&pDataBuffer, &dataLength, &dataAlignment);

    if (dataLength > 0)
    {
        createInfo.size += Pow2Align(dataLength, dataAlignment);
    }

    const gpusize perfDataOffset = createInfo.size;
    createInfo.size += PerformanceDataSize(abiProcessor);

    GpuMemory* pGpuMem = nullptr;
    gpusize    offset  = 0;
    Result result = m_pDevice->MemMgr()->AllocateGpuMem(createInfo, internalInfo, false, &pGpuMem, &offset);
    if (result == Result::Success)
    {
        m_gpuMemSize = createInfo.size;
        m_gpuMem.Update(pGpuMem, offset);

        void* pMappedPtr = nullptr;
        result = m_gpuMem.Map(&pMappedPtr);
        if (result == Result::Success)
        {
            (*pCodeGpuVirtAddr) = m_gpuMem.GpuVirtAddr();
            memcpy(pMappedPtr, pCodeBuffer, codeLength);

            if (dataLength > 0)
            {
                void* pDataPtr = VoidPtrAlign(VoidPtrInc(pMappedPtr, codeLength), static_cast<size_t>(dataAlignment));
                memcpy(pDataPtr, pDataBuffer, dataLength);

                (*pDataGpuVirtAddr) = ((*pCodeGpuVirtAddr) + Pow2Align(codeLength, dataAlignment));
                abiProcessor.ApplyRelocations(pDataPtr, dataLength, Abi::AbiSectionType::Data, (*pDataGpuVirtAddr));
            }

            gpusize perfGpuAddr   = m_gpuMem.GpuVirtAddr() + perfDataOffset;
            size_t  currentOffset = static_cast<size_t>(perfDataOffset);
            for (uint32 i = 0; i < static_cast<uint32>(Abi::HardwareStage::Count); i++)
            {
                Abi::PipelineMetadataType type =
                    Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderPerformanceDataBufferSize,
                                             static_cast<Abi::HardwareStage>(i));

                uint32 perfDataSize = 0;
                if (abiProcessor.HasPipelineMetadataEntry(type, &perfDataSize))
                {
                    m_perfDataInfo[i].sizeInBytes = perfDataSize;
                    m_perfDataInfo[i].cpuOffset   = currentOffset;
                    m_perfDataInfo[i].gpuVirtAddr = LowPart(perfGpuAddr);
                    memset(VoidPtrInc(pMappedPtr, m_perfDataInfo[i].cpuOffset), 0, perfDataSize);

                    perfGpuAddr   += perfDataSize;
                    currentOffset += perfDataSize;
                }
            }

            m_gpuMem.Unmap();
        }
    }

    return result;
}

// =====================================================================================================================
// Helper function for extracting the pipeline hash and per-shader hashes from a pipeline ABI processor.
void Pipeline::ExtractPipelineInfo(
    const AbiProcessor& abiProcessor,
    ShaderType          firstShader,
    ShaderType          lastShader)
{
    abiProcessor.HasPipelineMetadataEntries(Abi::PipelineMetadataType::PipelineHashHi,
                                            Abi::PipelineMetadataType::PipelineHashLo,
                                            &m_info.compilerHash);
    PAL_ALERT(m_info.compilerHash == 0); // We don't expect the pipeline ABI to report a hash of zero.

    // Default the pipeline hash to the compiler hash. PAL pipelines that include additional state should override this
    // with a new hash composed of that state and the compiler hash.
    m_info.pipelineHash = m_info.compilerHash;

    for (uint32 s = static_cast<uint32>(firstShader); s <= static_cast<uint32>(lastShader); ++s)
    {
        Abi::ApiShaderType shaderType = static_cast<Abi::ApiShaderType>(s);

        abiProcessor.HasPipelineMetadataEntries(
            GetMetadataHashForApiShader(shaderType, 1),
            GetMetadataHashForApiShader(shaderType, 0),
            &m_info.shader[s].hash.lower);

        abiProcessor.HasPipelineMetadataEntries(
            GetMetadataHashForApiShader(shaderType, 3),
            GetMetadataHashForApiShader(shaderType, 2),
            &m_info.shader[s].hash.upper);
    }

    if (abiProcessor.HasPipelineMetadataEntry(Abi::PipelineMetadataType::ApiHwShaderMappingLo) &&
        abiProcessor.HasPipelineMetadataEntry(Abi::PipelineMetadataType::ApiHwShaderMappingHi))
    {
        m_apiHwMapping.u32Lo =
            abiProcessor.GetPipelineMetadataEntry(Abi::PipelineMetadataType::ApiHwShaderMappingLo);
        m_apiHwMapping.u32Hi =
            abiProcessor.GetPipelineMetadataEntry(Abi::PipelineMetadataType::ApiHwShaderMappingHi);
    }
}

// =====================================================================================================================
// Query this pipeline's Bound GPU Memory.
Result Pipeline::QueryAllocationInfo(
    size_t*                   pNumEntries,
    GpuMemSubAllocInfo* const pGpuMemList)
{
    Result result = Result::ErrorInvalidPointer;

    if (pNumEntries != nullptr)
    {
        (*pNumEntries) = 1;

        if (pGpuMemList != nullptr)
        {
            pGpuMemList[0].offset     = m_gpuMem.Offset();
            pGpuMemList[0].pGpuMemory = m_gpuMem.Memory();
            pGpuMemList[0].size       = m_gpuMemSize;
        }

        result = Result::Success;
    }

    return result;
}

// =====================================================================================================================
// Extracts the binary shader instructions for a specific API shader stage.
Result Pipeline::GetShaderCode(
    ShaderType shaderType,
    size_t*    pSize,
    void*      pBuffer
    ) const
{
    // NOTE: Once SCPC is pulled out of PAL entirely, clients will be responsible for parsing their Pipeline binary
    // blobs to obtain this data.  Until then, we'll extract it from the Pipeline binary we're storing along with
    // this object.

    Result result = Result::ErrorUnavailable;

    const ShaderStageInfo*const pInfo = GetShaderStageInfo(shaderType);
    if (pSize == nullptr)
    {
        result = Result::ErrorInvalidPointer;
    }
    else if (pInfo != nullptr)
    {
        PAL_ASSERT(pInfo->codeLength != 0); // How did we get here if there's no shader code?!

        if (pBuffer == nullptr)
        {
            (*pSize) = pInfo->codeLength;
            result   = Result::Success;
        }
        else if ((*pSize) >= pInfo->codeLength)
        {
            // To extract the shader code, we can re-parse the saved ELF binary and lookup the shader's program
            // instructions by examining the symbol table entry for that shader's entrypoint.
            AbiProcessor abiProcessor(m_pDevice->GetPlatform());
            result = abiProcessor.LoadFromBuffer(m_pPipelineBinary, m_pipelineBinaryLen);
            if (result == Result::Success)
            {
                const auto& symbol = abiProcessor.GetPipelineSymbolEntry(
                        Abi::GetSymbolForStage(Abi::PipelineSymbolType::ShaderMainEntry, pInfo->stageId));
                PAL_ASSERT(symbol.size == pInfo->codeLength);

                const void* pCodeSection   = nullptr;
                size_t      codeSectionLen = 0;
                abiProcessor.GetPipelineCode(&pCodeSection, &codeSectionLen);
                PAL_ASSERT((symbol.size + symbol.value) <= codeSectionLen);

                memcpy(pBuffer,
                       VoidPtrInc(pCodeSection, static_cast<size_t>(symbol.value)),
                       static_cast<size_t>(symbol.size));
            }
        }
        else
        {
            result = Result::ErrorInvalidMemorySize;
        }
    }

    return result;
}

// =====================================================================================================================
// Produces a human-readable shader disassembly string for a specific API shader stage.
Result Pipeline::GetShaderDisassembly(
    ShaderType shaderType,
    void*      pBuffer,
    size_t*    pSize
    ) const
{
    Result result = Result::ErrorUnavailable;

    return result;
}

// =====================================================================================================================
// Extracts the performance data from GPU memory and copies it to the specified buffer.
Result Pipeline::GetPerformanceData(
    Util::Abi::HardwareStage hardwareStage,
    size_t*                  pSize,
    void*                    pBuffer)
{
    Result       result       = Result::ErrorUnavailable;
    const uint32 index        = static_cast<uint32>(hardwareStage);
    const auto&  perfDataInfo = m_perfDataInfo[index];

    if (pSize == nullptr)
    {
        result = Result::ErrorInvalidPointer;
    }
    else if (perfDataInfo.sizeInBytes > 0)
    {
        if (pBuffer == nullptr)
        {
            (*pSize) = perfDataInfo.sizeInBytes;
            result   = Result::Success;
        }
        else if ((*pSize) >= perfDataInfo.sizeInBytes)
        {
            void* pData = nullptr;
            result = m_gpuMem.Map(&pData);

            if (result == Result::Success)
            {
                memcpy(pBuffer, VoidPtrInc(pData, perfDataInfo.cpuOffset), perfDataInfo.sizeInBytes);
                result = m_gpuMem.Unmap();
            }
        }
    }

    return result;
}

// =====================================================================================================================
// Adds the shaders associated with this pipeline to the provided shader cache.
Result Pipeline::AddShadersToCache(
    IShaderCache* pShaderCache)
{
    PAL_ASSERT(pShaderCache != nullptr);
    Result result = Result::ErrorUnavailable;

    // NOTE: There's currently no way to extract entries for a shader cache from a precompiled pipeline binary.

    return result;
}

// =====================================================================================================================
// Helper method which extracts shader statistics from the pipeline ELF binary for a particular hardware stage.
Result Pipeline::GetShaderStatsForStage(
    const ShaderStageInfo& stageInfo,
    const ShaderStageInfo* pStageInfoCopy, // Optional: Non-null if we care about copy shader statistics.
    ShaderStats*           pStats
    ) const
{
    PAL_ASSERT(pStats != nullptr);
    memset(pStats, 0, sizeof(ShaderStats));

    // We can re-parse the saved pipeline ELF binary to extract shader statistics.
    AbiProcessor abiProcessor(m_pDevice->GetPlatform());
    Result result = abiProcessor.LoadFromBuffer(m_pPipelineBinary, m_pipelineBinaryLen);
    if (result == Result::Success)
    {
        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderNumUsedSgprs, stageInfo.stageId),
            &pStats->common.numUsedSgprs);

        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderNumUsedVgprs, stageInfo.stageId),
            &pStats->common.numUsedVgprs);

        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderNumAvailSgprs, stageInfo.stageId),
            &pStats->numAvailableSgprs);

        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderNumAvailVgprs, stageInfo.stageId),
            &pStats->numAvailableVgprs);

        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderLdsByteSize, stageInfo.stageId),
            reinterpret_cast<uint32*>(&pStats->common.ldsUsageSizeInBytes));

        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderScratchByteSize, stageInfo.stageId),
            reinterpret_cast<uint32*>(&pStats->common.scratchMemUsageInBytes));

        pStats->isaSizeInBytes = stageInfo.disassemblyLength;

        if (pStageInfoCopy != nullptr)
        {
            abiProcessor.HasPipelineMetadataEntry(
                Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderNumUsedSgprs, pStageInfoCopy->stageId),
                &pStats->copyShader.numUsedSgprs);

            abiProcessor.HasPipelineMetadataEntry(
                Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderNumUsedVgprs, pStageInfoCopy->stageId),
                &pStats->copyShader.numUsedVgprs);

            abiProcessor.HasPipelineMetadataEntry(
                Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderLdsByteSize, pStageInfoCopy->stageId),
                reinterpret_cast<uint32*>(&pStats->copyShader.ldsUsageSizeInBytes));

            abiProcessor.HasPipelineMetadataEntry(
                Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderScratchByteSize, pStageInfoCopy->stageId),
                reinterpret_cast<uint32*>(&pStats->copyShader.scratchMemUsageInBytes));
        }
    }

    return result;
}

// =====================================================================================================================
// Calculates the size, in bytes, of the performance data buffers needed total for the entire pipeline.
size_t Pipeline::PerformanceDataSize(
    const AbiProcessor& abiProcessor
    ) const
{
    size_t dataSize = 0;

    for (uint32 i = 0; i < static_cast<uint32>(Abi::HardwareStage::Count); i++)
    {
        const  Abi::HardwareStage hwStage       = static_cast<Abi::HardwareStage>(i);
        uint32                    perShaderSize = 0;

        abiProcessor.HasPipelineMetadataEntry(
            Abi::GetMetadataForStage(Abi::PipelineMetadataType::ShaderPerformanceDataBufferSize, hwStage),
            &perShaderSize);

        dataSize += perShaderSize;
    }

    return dataSize;
}

// =====================================================================================================================
void Pipeline::DumpPipelineElf(
    const AbiProcessor& abiProcessor,
    const char*         pPrefix
    ) const
{
#if PAL_ENABLE_PRINTS_ASSERTS
    const PalSettings& settings = m_pDevice->Settings();
    const uint64 hashToDump = settings.logPipelineHash;

    const bool hashMatches   = (hashToDump == 0) || (m_info.compilerHash == hashToDump);
    const bool dumpInternal  = TestAnyFlagSet(settings.logPipelines, PipelineLogInternal);
    const bool dumpExternal  = TestAnyFlagSet(settings.logPipelines, PipelineLogExternal);
    const bool dumpElfFormat = TestAnyFlagSet(settings.logPipelines, PipelineLogElfFormat);
    const bool dumpPipeline  =
        (hashMatches && ((dumpExternal && !IsInternal()) || (dumpInternal && IsInternal())) && dumpElfFormat);

    if (dumpPipeline)
    {
        const char*const pLogDir = &settings.pipelineLogDirectory[0];
        const char*const pName   = abiProcessor.GetPipelineName();

        char fileName[512] = { };
        if (pName == nullptr)
        {
            Snprintf(&fileName[0], sizeof(fileName), "%s/%s_0x%016llX.elf", pLogDir, pPrefix, m_info.compilerHash);
        }
        else
        {
            Snprintf(&fileName[0], sizeof(fileName), "%s/%s_%s.elf", pLogDir, pPrefix, pName);
        }

        File file;
        file.Open(fileName, FileAccessWrite | FileAccessBinary);
        file.Write(m_pPipelineBinary, m_pipelineBinaryLen);
    }
#endif

#if PAL_BUILD_GPUOPEN
    PipelineDumpService* pDumpService = m_pDevice->GetPlatform()->GetPipelineDumpService();
    if (pDumpService != nullptr)
    {
        pDumpService->RegisterPipeline(m_pPipelineBinary,
                                       static_cast<uint32>(m_pipelineBinaryLen),
                                       m_info.compilerHash);
    }
#endif
}

} // Pal
